GSE149524

there’re 4 batches

two P21 replicates for neuron atlas

one E15.5 and one E18.5 for trajectory analysis

load dependancies

load 10x data

GEX <- Read10X(data.dir = "../ref.GSE149524/GSE149524_RAW/GSM4504451_P21_2/")

check datasets

dim(GEX)
## [1] 27998  4220
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGTATGAAC-1 AAACCCAAGTCGGGAT-1 AAACCCACATGACTTG-1
## Xkr4                     .                  .                  2
## Gm1992                   .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Rp1.1                    .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCACATTGGGAG-1 AAACCCATCTCCTACG-1 AAACCCATCTTAATCC-1
## Xkr4                     .                  1                  .
## Gm1992                   .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Rp1.1                    .                  .                  .
## Sox17                    .                  .                  .

GEX

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "P21.rep2")
GEX.seur
## An object of class Seurat 
## 17800 features across 4220 samples within 1 assay 
## Active assay: RNA (17800 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
grep("^Rps|^Rpl",rownames(GEX),value = T)
##   [1] "Rpl7"       "Rpl31"      "Rpl37a"     "Rps6kc1"    "Rpl7a"     
##   [6] "Rpl12"      "Rpl35"      "Rps21"      "Rpl39"      "Rpl10"     
##  [11] "Rps4x"      "Rps6ka6"    "Rpl36a"     "Rps6ka3"    "Rpl22l1"   
##  [16] "Rps3a1"     "Rps27"      "Rpl34"      "Rps20"      "Rps6"      
##  [21] "Rps8"       "Rps6ka1"    "Rpl11"      "Rpl22"      "Rpl9"      
##  [26] "Rpl5"       "Rplp0"      "Rpl6"       "Rpl21"      "Rpl32"     
##  [31] "Rps9"       "Rpl28"      "Rps5"       "Rps19"      "Rps16"     
##  [36] "Rps11"      "Rpl13a"     "Rpl18"      "Rps17"      "Rps3"      
##  [41] "Rpl27a"     "Rps13"      "Rps15a"     "Rplp2"      "Rps12"     
##  [46] "Rps15"      "Rpl6l"      "Rpl41"      "Rps26"      "Rpl18a"    
##  [51] "Rps18-ps3"  "Rps26-ps1"  "Rpl13"      "Rpl21-ps4"  "Rpl15"     
##  [56] "Rps24"      "Rpl23a-ps3" "Rpl13-ps3"  "Rps2-ps6"   "Rps25"     
##  [61] "Rpl10-ps3"  "Rplp1"      "Rpl4"       "Rps27l"     "Rpl29"     
##  [66] "Rps27rt"    "Rpsa"       "Rpl14"      "Rps27a"     "Rpl26"     
##  [71] "Rpl23a"     "Rpl9-ps1"   "Rps6kb1"    "Rpl23"      "Rpl19"     
##  [76] "Rpl27"      "Rpl38"      "Rps23"      "Rpl36-ps3"  "Rps7"      
##  [81] "Rpl10l"     "Rps29"      "Rpl36al"    "Rps6kl1"    "Rps6ka5"   
##  [86] "Rpl37"      "Rpl30"      "Rpl7a-ps3"  "Rpl8"       "Rpl3"      
##  [91] "Rps19bp1"   "Rpl39l"     "Rpl35a"     "Rpl24"      "Rps6ka2"   
##  [96] "Rps2"       "Rpl3l"      "Rps10"      "Rpl10a"     "Rps28"     
## [101] "Rps18"      "Rpl7l1"     "Rpl36"      "Rpl7a-ps5"  "Rpl36-ps4" 
## [106] "Rpl27-ps3"  "Rps14"      "Rpl17"      "Rps6kb2"    "Rps6ka4"   
## [111] "Rpl9-ps6"   "Rpl13a-ps1" "Rps12-ps3"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

plota1 <- FeatureScatter(GEX.seur, feature1 = "nFeature_RNA", feature2 = "percent.mt") 
plotb1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota1 + plotb1 + plotc1

filtering

GEX.seur <- subset(GEX.seur, subset = nFeature_RNA > 1000 & nFeature_RNA < 8000 & nCount_RNA < 60000 & percent.mt < 10)
GEX.seur
## An object of class Seurat 
## 17800 features across 2406 samples within 1 assay 
## Active assay: RNA (17800 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

plota1 <- FeatureScatter(GEX.seur, feature1 = "nFeature_RNA", feature2 = "percent.mt") 
plotb1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc1 <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota1 + plotb1 + plotc1

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
## Warning: The following features are not present in the object: Pimreg, Jpt1,
## Cdc25c, not searching for symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), #group.by = "FB.info", 
    ncol = 2, pt.size = 0.01)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

nearly no cycling !

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 100)
##   [1] "Gal"      "Cartpt"   "Cck"      "Penk"     "Vip"      "Pcp4l1"  
##   [7] "Calcb"    "Nnat"     "Nefl"     "Scgn"     "Apoe"     "Sst"     
##  [13] "Ntng1"    "Npy"      "Sdpr"     "Ucn3"     "Grp"      "Krt19"   
##  [19] "Crabp1"   "Csrp2"    "Nefm"     "Ly6c1"    "Bglap"    "Sparc"   
##  [25] "Mgat4c"   "Dbh"      "Mt3"      "Cpne4"    "Slc17a6"  "Tac1"    
##  [31] "Calb1"    "Nmu"      "Ebf1"     "Adgrg6"   "Ifitm3"   "Myl1"    
##  [37] "Neurod6"  "Tmeff2"   "Rbp4"     "Bglap2"   "Fst"      "Adcyap1" 
##  [43] "Igfbp7"   "Ifi27l2a" "Nxph2"    "Paip2b"   "Col3a1"   "Serpine2"
##  [49] "Serpini1" "Fxyd1"    "Itih5"    "Abca9"    "Oas1d"    "Matn2"   
##  [55] "Gad2"     "Agrp"     "Entpd2"   "Zfp804a"  "Pcdh10"   "Cbln2"   
##  [61] "Phlda1"   "Rarres2"  "Skap1"    "Arpc1b"   "Ndufa4l2" "Cdkn1c"  
##  [67] "Lgals3"   "Epcam"    "Gfap"     "Hspa1a"   "Sparcl1"  "Isg15"   
##  [73] "Pmepa1"   "Ntrk3"    "Col9a2"   "Cidea"    "Col12a1"  "Rprml"   
##  [79] "Nog"      "Camp"     "Tspan8"   "Cd24a"    "Timp4"    "Cox8b"   
##  [85] "Fabp7"    "Gm26772"  "Tcap"     "AI593442" "Cntn5"    "Lpar1"   
##  [91] "Postn"    "Vstm2a"   "Avil"     "Tulp3"    "Brinp3"   "Pcp4"    
##  [97] "Igfbp4"   "Col18a1"  "Plp1"     "Scn7a"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|Egr1|RP23-|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AI|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


GEX.seur <- RunPCA(GEX.seur, features = setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) ))
## PC_ 1 
## Positive:  Apoe, Rarres2, Fxyd1, Lpar1, Sparc, Col18a1, Col12a1, Nid1, Gsn, Entpd2 
##     Sdc4, Atp1a2, Ifitm3, Plp1, Gpm6b, Gpr37l1, Pmepa1, Tmprss5, Qk, Cmtm5 
##     Nkain4, Timp3, Ttyh1, Col3a1, Serpinh1, Dbi, Arpc1b, Foxd3, Cdh19, Gjc3 
## Negative:  Prph, Pcsk1n, Bex2, Snhg11, Tubb2b, Resp18, Syt1, Mllt11, Tubb3, Nsg1 
##     Phox2a, Gap43, Zcchc12, Aplp1, Scg2, Lix1, Nrsn1, Tagln3, Fabp5, Tm4sf4 
##     Fgf13, Map1b, Fxyd7, Ptprn, Chga, Syt4, Fdps, Ncoa7, Tm4sf1, Olfm1 
## PC_ 2 
## Positive:  Etv1, Nos1, Rprml, Vip, Gal, Fxyd5, C1ql1, Tesc, Tbx3, Kitl 
##     Alcam, Auts2, Cartpt, Stxbp6, Tmem176a, Cadps2, Rit2, Nxn, Dgkb, Arhgap15 
##     Tspan13, Slc35d3, Qdpr, Tes, Tmem108, Ass1, Fam89a, Cpne2, Ltk, Mfsd4 
## Negative:  Dmkn, Calb2, Oprk1, Slc18a3, Satb1, Chgb, Tshz2, Ly6e, Rab3b, Brinp2 
##     Fam19a5, Dsc3, Tac1, Ffar3, Ndufa4l2, Sncb, Casz1, Pi15, Prmt8, Plcxd3 
##     Tcf7l2, Il11ra1, Aqp1, Ifitm2, Folr1, Rbfox1, Pdpn, Cbln1, Psd3, Fam19a1 
## PC_ 3 
## Positive:  S100a16, S100a4, Ncald, Nos1, Ass1, Cryab, Rprml, Bglap, Rbp4, Bglap2 
##     Ppp1r14c, C1ql1, Cox8b, Kitl, Cygb, Npy, Mfsd4, Tmem255b, Stxbp6, Aldh1a3 
##     Adamts5, Kcnab1, Gal, Tes, Abcb1b, Qdpr, Chga, Slc35d3, Enpp1, Il11ra1 
## Negative:  Id4, Tmeff2, Serpini1, Pbx3, Nrsn2, Myl1, Mt3, Cd24a, Rab27b, Klhl1 
##     Spock3, Nefl, Mgat4c, Nefm, Nnat, Avil, Ntrk3, Mrap2, Scgn, Kcnip4 
##     Slc17a6, Adra2a, Pcdh10, Snx7, Enc1, Kcnk2, Skap1, Amigo2, Kctd4, Kcng1 
## PC_ 4 
## Positive:  Basp1, Ddah1, Ntrk3, P2rx2, Tcf7l2, Avil, Id2, Cpne4, Scube1, Scg2 
##     Ifi27, Scgn, Dgkg, Kcnb2, Rph3a, Tmeff2, Ano2, Nt5dc2, Krt19, Id1 
##     Calb2, Ly6e, Prune2, Slc4a4, Calcb, Plxna4, Dapk2, Gcgr, Sh3gl3, Oas1a 
## Negative:  Gda, Ndst4, Htr2b, Ptn, Kcnip4, Tac1, Pgm5, Grem2, Pdlim3, Abca5 
##     Kctd8, Csmd3, Lin7a, S100a1, Nxph1, Skap2, Socs2, Chchd10, Rwdd3, Cst12 
##     Bend5, Epha5, Asic4, Fhad1, Dock10, Ogfrl1, Smyd3, Cd79a, Rogdi, Edil3 
## PC_ 5 
## Positive:  Dapk2, Nmu, Krt19, Nog, Cbln2, Ano2, Cpne4, Syt15, Zfp804a, Cysltr2 
##     Slc25a48, Scn11a, Atoh8, Edn1, Pcdh10, Grin3a, Layn, Adgrg6, Calcb, Gpr149 
##     Sdsl, Cidea, Astn2, Necab1, Wif1, Tbx2, Npas1, Rph3a, Hpca, Galr1 
## Negative:  Nefl, Nefm, Pcp4l1, Gna14, Nxph2, Ntng1, Slc17a6, Fam122b, Phlda1, Csrp2 
##     Npy5r, Adcyap1, Calb1, March1, Pbx3, Ddah1, Klhl1, Kcng1, Ltk, Ush1c 
##     Nrip3, Bmpr1b, Npy1r, Trps1, Nrp1, Tesc, Pcdh7, Dbh, Vstm2a, Abca9
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ))
## [1] 1379
head(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG, CC_gene) ),300)
##   [1] "Gal"      "Cartpt"   "Cck"      "Penk"     "Vip"      "Pcp4l1"  
##   [7] "Calcb"    "Nnat"     "Nefl"     "Scgn"     "Apoe"     "Sst"     
##  [13] "Ntng1"    "Npy"      "Sdpr"     "Ucn3"     "Grp"      "Krt19"   
##  [19] "Crabp1"   "Csrp2"    "Nefm"     "Ly6c1"    "Bglap"    "Sparc"   
##  [25] "Mgat4c"   "Dbh"      "Mt3"      "Cpne4"    "Slc17a6"  "Tac1"    
##  [31] "Calb1"    "Nmu"      "Ebf1"     "Adgrg6"   "Ifitm3"   "Myl1"    
##  [37] "Neurod6"  "Tmeff2"   "Rbp4"     "Bglap2"   "Fst"      "Adcyap1" 
##  [43] "Igfbp7"   "Ifi27l2a" "Nxph2"    "Paip2b"   "Col3a1"   "Serpine2"
##  [49] "Serpini1" "Fxyd1"    "Itih5"    "Abca9"    "Oas1d"    "Matn2"   
##  [55] "Gad2"     "Agrp"     "Entpd2"   "Zfp804a"  "Pcdh10"   "Cbln2"   
##  [61] "Phlda1"   "Rarres2"  "Skap1"    "Arpc1b"   "Ndufa4l2" "Cdkn1c"  
##  [67] "Lgals3"   "Epcam"    "Gfap"     "Sparcl1"  "Isg15"    "Pmepa1"  
##  [73] "Ntrk3"    "Col9a2"   "Cidea"    "Col12a1"  "Rprml"    "Nog"     
##  [79] "Camp"     "Tspan8"   "Cd24a"    "Timp4"    "Cox8b"    "Fabp7"   
##  [85] "Tcap"     "Cntn5"    "Lpar1"    "Postn"    "Vstm2a"   "Avil"    
##  [91] "Tulp3"    "Brinp3"   "Pcp4"     "Igfbp4"   "Col18a1"  "Plp1"    
##  [97] "Scn7a"    "Cckar"    "Dapk2"    "Gsn"      "Vim"      "Ecel1"   
## [103] "Gpr149"   "C1ql1"    "Nos1"     "Sostdc1"  "Htr2b"    "Sfrp1"   
## [109] "Pkib"     "Kcna1"    "Id3"      "Pdyn"     "Tesc"     "Gng11"   
## [115] "Calb2"    "C1ql2"    "Cx3cr1"   "Cst12"    "Etv1"     "Ghrh"    
## [121] "Pnoc"     "Gna14"    "Gng2"     "Galr1"    "Ass1"     "Klhl1"   
## [127] "Prune2"   "Nid1"     "Moxd1"    "Pbx3"     "Htr3a"    "Pyy"     
## [133] "Lepr"     "Nrip3"    "Gpm6b"    "Nrn1"     "Otor"     "Itm2a"   
## [139] "Ano2"     "Col1a2"   "Fam122b"  "Islr2"    "Chodl"    "Fxyd3"   
## [145] "Timp3"    "Id1"      "Fam159b"  "Rprm"     "Atf3"     "Omp"     
## [151] "Iigp1"    "Gng8"     "Snca"     "Thy1"     "Cmtm5"    "Cxcl10"  
## [157] "Ifi203"   "Kcnb2"    "Efr3a"    "Krt8"     "Edn1"     "Nkain4"  
## [163] "Hoxb7"    "Foxd3"    "Isl1"     "Gpr37l1"  "Sfrp5"    "S100b"   
## [169] "Dbi"      "Tmprss5"  "Atp1a2"   "Spock3"   "Rab27b"   "Fam89a"  
## [175] "Robo2"    "Id4"      "Cdh9"     "Kcnk2"    "Tmc3"     "Upp1"    
## [181] "Col1a1"   "Sema5a"   "H2-Q2"    "Gda"      "Nmrk1"    "Phgdh"   
## [187] "Basp1"    "Sdc4"     "Gsta4"    "Peg10"    "Art3"     "Col11a1" 
## [193] "Nrp2"     "Rph3a"    "Hoxa7"    "Myl9"     "Bdnf"     "Sag"     
## [199] "Grin3a"   "Vsnl1"    "Serpinh1" "Cpxm2"    "Dgkg"     "Mgll"    
## [205] "Ttyh1"    "Pcdh9"    "Id2"      "Alcam"    "Th"       "Gip"     
## [211] "Nrsn2"    "Plac8"    "Npy5r"    "Poc1a"    "Ddah1"    "Tmem35"  
## [217] "Layn"     "Lrat"     "Ppy"      "Cdkn1a"   "Abhd11os" "Syt15"   
## [223] "Col20a1"  "Exosc2"   "Hoxb8"    "Lypd1"    "F2r"      "Fbln2"   
## [229] "Maf"      "Kcnip4"   "Ttr"      "Neurod1"  "Calca"    "Fxyd5"   
## [235] "Stxbp6"   "Rbp1"     "Rerg"     "Prss56"   "Ngfr"     "Nrgn"    
## [241] "Cygb"     "Spink8"   "Serpinf1" "Xist"     "Pcsk1"    "Bambi"   
## [247] "Ccnd1"    "Akap7"    "Tbx3"     "Cldn7"    "Aard"     "Parm1"   
## [253] "Snx7"     "Cadps2"   "Ush1c"    "Acp6"     "S100a11"  "Sult1d1" 
## [259] "Ifit1"    "Nhlh2"    "Ucp2"     "Slc6a4"   "Igfbp2"   "Nkain3"  
## [265] "Chga"     "Vcam1"    "Npy1r"    "Prkcdbp"  "Nrp1"     "Synpr"   
## [271] "Fgl2"     "Vcan"     "Ltk"      "Mal"      "Kitl"     "Fam46a"  
## [277] "Tbx2"     "Adora1"   "Txnip"    "Resp18"   "Cd79a"    "Pdlim2"  
## [283] "Clec1a"   "Cpne8"    "Ddc"      "Hnmt"     "Grm5"     "Cst3"    
## [289] "Oprk1"    "Frzb"     "Ptgfr"    "Gfral"    "Pdzrn4"   "Lbp"     
## [295] "Bgn"      "Asb2"     "Mrap2"    "Ly6e"     "Cthrc1"   "Acta2"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:24
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param =15)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 1.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2406
## Number of edges: 69711
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8140
## Number of communities: 22
## Elapsed time: 0 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity=100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 135)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 21:34:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:34:28 Read 2406 rows and found 24 numeric columns
## 21:34:28 Using Annoy for neighbor search, n_neighbors = 20
## 21:34:28 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:34:28 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpaoHrro\file76f0279611b0
## 21:34:28 Searching Annoy index using 1 thread, search_k = 2000
## 21:34:29 Annoy recall = 100%
## 21:34:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 21:34:30 Initializing from normalized Laplacian + noise (using irlba)
## 21:34:30 Commencing optimization for 500 epochs, with 63104 positive edges
## 21:34:36 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

markers.neur <- list(PEMN=c("Chat","Tac1","Drd2"),
                     PIMN=c("Nos1","Vip","Adm","Lgr5"),
                     PIN=c("Penk","Prlr","Mtnr1a"),
                     PSN=c("Calca","Calcb","Cck","Bdnf",
                           "Piezo2","Nog","Nmu","Sst","Il4ra",
                           "Il13ra1","Il7"),
                     PSVN=c("Glp2r","Fst","Csf2rb","Csf2rb2"),
                     Glia=c("Plp1","Gfap","Rxrg"),  # add three more
                     mosue=c("Fos","Actl6a","Actl6b","Phox2b","Sox10","Mki67","Top2a")  # Baf53
                     )
unlist(markers.neur)
##     PEMN1     PEMN2     PEMN3     PIMN1     PIMN2     PIMN3     PIMN4      PIN1 
##    "Chat"    "Tac1"    "Drd2"    "Nos1"     "Vip"     "Adm"    "Lgr5"    "Penk" 
##      PIN2      PIN3      PSN1      PSN2      PSN3      PSN4      PSN5      PSN6 
##    "Prlr"  "Mtnr1a"   "Calca"   "Calcb"     "Cck"    "Bdnf"  "Piezo2"     "Nog" 
##      PSN7      PSN8      PSN9     PSN10     PSN11     PSVN1     PSVN2     PSVN3 
##     "Nmu"     "Sst"   "Il4ra" "Il13ra1"     "Il7"   "Glp2r"     "Fst"  "Csf2rb" 
##     PSVN4     Glia1     Glia2     Glia3    mosue1    mosue2    mosue3    mosue4 
## "Csf2rb2"    "Plp1"    "Gfap"    "Rxrg"     "Fos"  "Actl6a"  "Actl6b"  "Phox2b" 
##    mosue5    mosue6    mosue7 
##   "Sox10"   "Mki67"   "Top2a"
DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "seurat_clusters") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: Drd2, Csf2rb2

DoubletFinder

library(DoubletFinder)
## 载入需要的程辑包:KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## 载入需要的程辑包:ROCR

## NULL
## [1] "Creating 802 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 802 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."

markers

figure2

check.fig2 <- list(Neurotransmission=c("Oprk1","Adrb2","Cckar","Htr2b",
                                       "Gucy2g","Galr1","Vipr2","Grin3a",
                                       "Adora1","Crhr2","Chrna2","Tacr3",
                                       "Gabrg3","Nmur2","Grm5","P2ry6",
                                       "Galr2","Sstr2","Gabre","Npy5r",
                                       "Npy1r","Sstr5"),
                   Othersignaling=c("Cxcl12","Fgfr2","Ntrk2","Egfr",
                                    "Nmbr","Ptgfr","Pgf","Edn1",
                                    "Kit","Prokr2","Islr2","Gfral",
                                    "Mc4r","Bdnf","Kitl","Gfra1",
                                    "Tgfbr2","Ednra","Prokr1","Bmpr1b"),
                   Ionchannels=c("Kcns3","Ano10","Kcnip4","Kcnip1",
                                 "Kcnn2","Kcnn3","Ano2","Ano8",
                                 "Kcna2","Scn1a","Kcna5","Kcnab1",
                                 "Cacna1i","Kcnd2","Kcnv1",
                                 "Cacng5","Piezo2","Piezo1"),   # Peizo1 manually added 
                   Adhesionmolecules=c("Ly6c1","Itgb5","Sema3e","Ntn1",
                                       "Slitrk4","Itga6","Cdh8","Ptpru",
                                       "Itgb6","Unc5b","Avil","Sema5a",
                                       "Epha8","Cdh7","Itga1","Ephb6",
                                       "Flrt2","Nxph2","Ntng1"),
                   Transcriptionfactors=c("Satb1","Ebf3","Bnc2","Nfatc1",
                                          "Zfp503","Satb2","Cux2","Dlx3",
                                          "Atoh8","Zfp804a","Ebf2","Pbx3",
                                          "Meis1","Etv1","St18","Ebf1",
                                          "Neurod6","Trps1","Zfp800","Onecut2",
                                          "Phox2a","Phox2b"),
                   AnnexinsCopines=c("Anxa4","Anxa3","Anxa11","Anxa2",
                                     "Anxa5","Anxa6","Anxa7","Cpne4",
                                     "Cpne5","Cpne7","Cpne2","Cpne3",
                                     "Cpne8"))
names.fig2 <- names(check.fig2)
check.fig2
## $Neurotransmission
##  [1] "Oprk1"  "Adrb2"  "Cckar"  "Htr2b"  "Gucy2g" "Galr1"  "Vipr2"  "Grin3a"
##  [9] "Adora1" "Crhr2"  "Chrna2" "Tacr3"  "Gabrg3" "Nmur2"  "Grm5"   "P2ry6" 
## [17] "Galr2"  "Sstr2"  "Gabre"  "Npy5r"  "Npy1r"  "Sstr5" 
## 
## $Othersignaling
##  [1] "Cxcl12" "Fgfr2"  "Ntrk2"  "Egfr"   "Nmbr"   "Ptgfr"  "Pgf"    "Edn1"  
##  [9] "Kit"    "Prokr2" "Islr2"  "Gfral"  "Mc4r"   "Bdnf"   "Kitl"   "Gfra1" 
## [17] "Tgfbr2" "Ednra"  "Prokr1" "Bmpr1b"
## 
## $Ionchannels
##  [1] "Kcns3"   "Ano10"   "Kcnip4"  "Kcnip1"  "Kcnn2"   "Kcnn3"   "Ano2"   
##  [8] "Ano8"    "Kcna2"   "Scn1a"   "Kcna5"   "Kcnab1"  "Cacna1i" "Kcnd2"  
## [15] "Kcnv1"   "Cacng5"  "Piezo2"  "Piezo1" 
## 
## $Adhesionmolecules
##  [1] "Ly6c1"   "Itgb5"   "Sema3e"  "Ntn1"    "Slitrk4" "Itga6"   "Cdh8"   
##  [8] "Ptpru"   "Itgb6"   "Unc5b"   "Avil"    "Sema5a"  "Epha8"   "Cdh7"   
## [15] "Itga1"   "Ephb6"   "Flrt2"   "Nxph2"   "Ntng1"  
## 
## $Transcriptionfactors
##  [1] "Satb1"   "Ebf3"    "Bnc2"    "Nfatc1"  "Zfp503"  "Satb2"   "Cux2"   
##  [8] "Dlx3"    "Atoh8"   "Zfp804a" "Ebf2"    "Pbx3"    "Meis1"   "Etv1"   
## [15] "St18"    "Ebf1"    "Neurod6" "Trps1"   "Zfp800"  "Onecut2" "Phox2a" 
## [22] "Phox2b" 
## 
## $AnnexinsCopines
##  [1] "Anxa4"  "Anxa3"  "Anxa11" "Anxa2"  "Anxa5"  "Anxa6"  "Anxa7"  "Cpne4" 
##  [9] "Cpne5"  "Cpne7"  "Cpne2"  "Cpne3"  "Cpne8"
lapply(check.fig2, length)
## $Neurotransmission
## [1] 22
## 
## $Othersignaling
## [1] 20
## 
## $Ionchannels
## [1] 18
## 
## $Adhesionmolecules
## [1] 19
## 
## $Transcriptionfactors
## [1] 22
## 
## $AnnexinsCopines
## [1] 13
names.fig2
## [1] "Neurotransmission"    "Othersignaling"       "Ionchannels"         
## [4] "Adhesionmolecules"    "Transcriptionfactors" "AnnexinsCopines"
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1])

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2])

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3])

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4])

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5])

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6])

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(0,1,18,  # ENC1
                                            4,6,     # ENC2
                                            9,2,     # ENC3
                                            15,      # ENC4
                                            21,      # ENC5
                                            19,      # ENC6
                                            10,   # ENC7
                                            7,8,       # ENC8
                                            5,12,  # ENC9
                                            11,      # ENC10
                                            17,      # ENC11
                                            13,      # ENC12
                                            
                                            16,      # Glia1
                                            3,       # Glia2
                                            14,      # Glia3
                                            20       # Glia4
                                            ))

GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno[GEX.seur$preAnno %in% c(0,1,18)] <- "ENC1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(4,6)] <- "ENC2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(9,2)] <- "ENC3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(15)] <- "ENC4"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(21)] <- "ENC5"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(19)] <- "ENC6"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(10)] <- "ENC7"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(7,8)] <- "ENC8"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(5,12)] <- "ENC9"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(11)] <- "ENC10"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(17)] <- "ENC11"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(13)] <- "ENC12"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(16)] <- "Glia1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(3)] <- "Glia2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(14)] <- "Glia3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(20)] <- "Glia4"

GEX.seur$preAnno <- factor(GEX.seur$preAnno,
                           levels = c(paste0("ENC",1:12),
                                      paste0("Glia",1:4)))
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1])

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2])

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3])

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4])

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5])

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6])

cowplot::plot_grid(
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

cowplot::plot_grid(
DotPlot(GEX.seur, features = check.fig2$Neurotransmission, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 11.5))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[1]),

DotPlot(GEX.seur, features = check.fig2$Othersignaling, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+#scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[2]),

DotPlot(GEX.seur, features = check.fig2$Ionchannels, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[3]),

DotPlot(GEX.seur, features = check.fig2$Adhesionmolecules, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[4]),

DotPlot(GEX.seur, features = check.fig2$Transcriptionfactors, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[5]),

DotPlot(GEX.seur, features = check.fig2$AnnexinsCopines, group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 12.15))+ #scale_y_discrete(limits=rev) + 
  labs(title=names.fig2[6]),
ncol = 2)

DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "sort_clusters") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: Drd2, Csf2rb2

DotPlot(GEX.seur, features = as.vector(unlist(markers.neur)), group.by = "preAnno") + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
## Warning in FetchData.Seurat(object = object, vars = features, cells = cells):
## The following requested variables were not found: Drd2, Csf2rb2

color.pre <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
               "#C03778","#97BA59","#DFAB16","#BF7E6B",
               "#D46B35","#BDAE8D","#BD66C4","#2BA956",
               "#FF8080","#FF8080","#FF8080","#FF0000")

#saveRDS(GEX.seur,"GSE149524.P21_rep2.initial.rds")